* Iris Arbogast (irisarbogast@gmail.com)
* 8/29/2023
/* This program imports and cleans data for the policy regressions including: 
- the policy changes and dates from the novel dataset we create
- state-year level population data from the ACS
- Medicaid.gov enrollment data
- the controls created in GetandCleanControls from R
- Premium, income eligibility and redetermination pause data from KFF Annual Surveys
*/

*************************
/* Import excel file I created with policy changes and dates, and transform it into a 
	file with 0s and then 1s after the policy change. */
*************************

import excel "Data/Policies.xls", sheet("PoliciesbyState") firstrow clear
replace Enddate = td(1jan2022) if Enddate == td(1jan2021)
gen StartMonth = mofd(Startdate)
gen EndMonth = mofd(Enddate)

foreach Policy in Adminburden	WorkRequirements MedicaidExpansion freq_check	str_check	automatic_drop {

foreach State in AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL IN KS KY LA MA MD ME MI MN MO MS MT NC ///
 ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI WV WY {

* Create variables in the dataset for each state and policy type
gen Start_`State'_`Policy' = StartMonth if (PostalCode == "`State'" & `Policy' == 1) 
gen End_`State'_`Policy' = EndMonth if (PostalCode == "`State'" & `Policy' == 1)

* Create locals
su Start_`State'_`Policy'
scalar Start_`State'_`Policy'_1 = r(min)
scalar Start_`State'_`Policy'_2 = r(max)

su End_`State'_`Policy', meanonly
scalar End_`State'_`Policy'_1 = r(min)
scalar End_`State'_`Policy'_2 = r(max)
}
}
 
* create panel dataset of the correct size
drop _all

set obs 51
gen var4 = _n
expand 144
bys var4 : gen date = 599 + _n

format date %tm

merge m:1 var4 using Data/statefips.dta
drop _merge

foreach Policy in Adminburden	WorkRequirements MedicaidExpansion freq_check	str_check	automatic_drop  {

gen `Policy' = 0

foreach State in AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL IN KS KY LA MA MD ME MI MN MO MS MT NC ///
 ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI WV WY {

replace `Policy' = 1 if (date > Start_`State'_`Policy'_1 - 1 & date < End_`State'_`Policy'_1 + 1 & stusps == "`State'")
replace `Policy' = 1 if (date > Start_`State'_`Policy'_2 - 1 & date < End_`State'_`Policy'_2 + 1 & stusps == "`State'")
}
}

rename stname sname

rename st statefip

gen year = 1960 + floor(date/12)

drop if year < 2008
drop if year > 2020


* create public charge variables

gen public_charge_2019 = 0
replace public_charge_2019 = 1 if date > 703


gen public_charge_2017 = 0
replace public_charge_2017 = 1 if date >= 684 


save Data/Clean/Cleaned_Policy_Data.dta, replace

*************************
* Use ACS datasets to add in population and poverty rate data
*************************


* Add in child population data
use Data/ACS.dta, clear
keep if age < 19 & !(missing(age))
collapse (sum) perwt, by(year statefip)
rename perwt pop
merge 1:m st year using Data/Clean/Cleaned_Policy_Data.dta 
drop _merge
save Data/Clean/Cleaned_Policy_Data.dta, replace


* Add in total population data
use Data/ACS.dta, clear
collapse (sum) perwt, by(year statefip)
rename perwt totpop
merge 1:m st year using Data/Clean/Cleaned_Policy_Data.dta 
drop _merge
save Data/Clean/Cleaned_Policy_Data.dta, replace


* Add in poverty control data
use Data/ACS.dta, clear
keep if age < 19 & !(missing(age))
drop if poverty == 0
gen pov = 0
replace pov = 1 if poverty < 100
collapse (mean) pov [pw = perwt], by(year statefip)
merge 1:m st year using Data/Clean/Cleaned_Policy_Data.dta 
drop _merge
save Data/Clean/Cleaned_Policy_Data.dta, replace


sort state date


*************************
* Add medicaid rates 
*************************
* Clean Medicaid.gov data, combining new and older datasets
use Data/Clean/Cleaned_Policy_Data.dta, clear
drop if missing(date)
drop if missing(sname)
merge 1:m sname date using Data/Enrollment/CMSEnrollment.dta
drop _merge

rename statefip st
save Data/Clean/Cleaned_Policy_Data.dta, replace


* Import Tennessee Data Separately
use Data/Enrollment/fte_20142021childTN.dta, clear
collapse (sum) child_enroll, by(year mo)
rename child_enroll child_enroll_TN
gen st = 47
merge 1:m st year mo using Data/Clean/Cleaned_Policy_Data.dta
drop _merge
replace mchip_child = child_enroll_TN if st == 47
save Data/Clean/Cleaned_Policy_Data.dta, replace


* Import Arizona Data
import excel Data/Enrollment/ArizonaData.xls, firstrow clear
rename Child_Enrollment child_enroll_AZ
merge 1:m st year mo using Data/Clean/Cleaned_Policy_Data.dta
drop _merge
replace mchip_child = child_enroll_AZ if st == 4
save Data/Clean/Cleaned_Policy_Data.dta, replace


* Import North Carolina Data
use Data/Enrollment/mcaidNC20142021.dta, clear
rename enroll_infchild child_enroll_NC
gen st = 37
gen mo = month
drop month
merge 1:m st year mo using Data/Clean/Cleaned_Policy_Data.dta
drop _merge
replace mchip_child = child_enroll_NC if st == 37
drop child_enroll_TN child_enroll_NC child_enroll_AZ

* Generate enrollment rate as enrollment over child population
gen mchiprate = mchip_child/pop

save Data/Clean/Cleaned_Policy_Data.dta, replace


*************************
* Add Premiums and Income Eligibility Limits from KFF data
*************************
* Create fiscal year variable to correctly merge in KFF data

/*Forty-six states began fiscal year 2021 on July 1, 2020. 
New York began its fiscal year on April 1, Texas begins its fiscal year on Sept. 1, and 
Alabama and Michigan begin theirs on Oct. 1. 
*/
use Data/Clean/Cleaned_Policy_Data.dta, clear
gen daydate =  dofm(date)
format daydate %d
replace mo = month(daydate)


gen fiscalyear = .
replace fiscalyear = year if mo < 7
replace fiscalyear = year + 1 if mo >= 7
replace fiscalyear = year + 1 if mo >= 4 & stusps == "NY"
sort stusps date
replace fiscalyear = year if mo < 10 & stusps == "AL"
replace fiscalyear = year if mo < 10 & stusps == "MI"
replace fiscalyear = year if mo < 11 & stusps == "TX"

* Now that we have fiscal year data, merge in premiums and eligibility cutoffs based on those years
merge m:1 stusps fiscalyear using Data/KFF/incomelims.dta
drop _merge

merge m:1 fiscalyear stusps using Data/KFF/premiums.dta
drop if _merge == 2
drop _merge
save Data/Clean/Cleaned_Policy_Data.dta, replace


use Data/KFF/waittime_CHIPftnts_s12.dta, clear
rename state stname
replace year = year +1
rename year fiscalyear
merge m:1 stname using Data/statefips.dta
drop _merge

merge 1:m stusps fiscalyear using Data/Clean/Cleaned_Policy_Data.dta
drop _merge


save Data/Clean/Cleaned_Policy_Data.dta, replace


use Data/KFF/imm_nowait.dta, clear
rename state stname
replace year = year +1
rename year fiscalyear
merge m:1 stname using Data/statefips.dta
drop _merge

merge 1:m stusps fiscalyear using Data/Clean/Cleaned_Policy_Data.dta
drop _merge
save Data/Clean/Cleaned_Policy_Data.dta, replace


use Data/KFF/waittime_CHIP.dta, clear
rename state stname
replace year = year +1
rename year fiscalyear
merge m:1 stname using Data/statefips.dta
drop _merge

gen waittime_CHIP01 = .
replace waittime_CHIP01= 0 if (waittime_CHIP~=.)
replace waittime_CHIP01= 1 if (waittime_CHIP>=1 & waittime_CHIP~=.)
label var waittime_CHIP01 "1 if CHIP waiting period; 0 if none"
merge 1:m stusps fiscalyear using Data/Clean/Cleaned_Policy_Data.dta
drop _merge

save Data/Clean/Cleaned_Policy_Data.dta, replace


use Data/KFF/presumpteligCHIP.dta, clear
rename state stname
replace year = year +1
rename year fiscalyear
merge m:1 stname using Data/statefips.dta
drop _merge

merge 1:m stusps fiscalyear using Data/Clean/Cleaned_Policy_Data.dta
drop _merge

save Data/Clean/Cleaned_Policy_Data.dta, replace


use Data/KFF/presumpteligMcaid.dta, clear
rename state stname
replace year = year +1
rename year fiscalyear
merge m:1 stname using Data/statefips.dta
drop _merge

merge 1:m stusps fiscalyear using Data/Clean/Cleaned_Policy_Data.dta
drop _merge

save Data/Clean/Cleaned_Policy_Data.dta, replace

use Data/KFF/realtimeelig.dta, clear
rename state stname
replace year = year +1
rename year fiscalyear
merge m:1 stname using Data/statefips.dta
drop _merge

merge 1:m stusps fiscalyear using Data/Clean/Cleaned_Policy_Data.dta
drop _merge

save Data/Clean/Cleaned_Policy_Data.dta, replace


use Data/KFF/autorenewal.dta, clear
rename state stname
replace year = year +1
rename year fiscalyear
merge m:1 stname using Data/statefips.dta
drop _merge

merge 1:m stusps fiscalyear using Data/Clean/Cleaned_Policy_Data.dta
drop _merge

save Data/Clean/Cleaned_Policy_Data.dta, replace

use Data/KFF/imm_nowait.dta, clear
rename state stname
replace year = year +1
rename year fiscalyear
merge m:1 stname using Data/statefips.dta
drop _merge

merge 1:m stusps fiscalyear using Data/Clean/Cleaned_Policy_Data.dta
drop _merge


* Recode variables for CHIP such that variables for CHIP in states that do not have CHIP use the Medicaid value

gen presumpteligCHIP2 = .
replace presumpteligCHIP2 = 1 if presumpteligCHIP == 1
replace presumpteligCHIP2 = 0 if presumpteligCHIP == 0
replace presumpteligCHIP2 = presumpteligMcaid if presumpteligCHIP == 2


gen imm_nowaitCHIP2 = .
replace imm_nowaitCHIP2 = 1 if imm_nowaitCHIP == 1
replace imm_nowaitCHIP2 = 0 if imm_nowaitCHIP == 0

replace imm_nowaitCHIP2 = imm_nowaitMdcd if imm_nowaitCHIP == 2

drop if missing(date)
drop if missing(st)


tsset st date

foreach variables in waittime_CHIP presumpteligCHIP2 presumpteligMcaid realtimeelig autorenewal imm_nowaitMdcd imm_nowaitCHIP2 waittime_CHIP01 {

replace `variables' = F12.`variables' if fiscalyear == 2014
replace `variables' = . if year == 2013 & fiscalyear == 2014
	
}

save Data/Clean/Cleaned_Policy_Data.dta, replace




*************************
* Redetermination Pause Variable and Controls from R
*************************
* Create redetermination variable
use Data/KFF/redet.dta, clear
merge 1:m sname year using Data/Clean/Cleaned_Policy_Data.dta 
drop _merge
sort date

* add COVID redetermination pause
replace new_redetpause = 1 if date > 721
replace new_redetpause = 0 if missing(new_redetpause)

save Data/Clean/Cleaned_Policy_Data.dta, replace


* Clean and add state-level controls data from FRED
insheet using "Data/Statecontrols.csv", clear
rename gsp GSP
drop v1 date
gen date = ym(year, mo)
format date %tm
drop year mo
merge 1:m stusps date using Data/Clean/Cleaned_Policy_Data.dta 
drop _merge


* Make GSP per capita and edit other units
replace GSP = (GSP*1000)/totpop
replace cutoffmax = cutoffmax*100
replace pov = pov*100

drop if missing(st)

* Add separate year, month, day variables for the R script
format date %tm 
gen date2 = dofm(date)
gen mo2 = month(date2)
gen year2 = year(date2)

drop if missing(date)

save Data/Clean/Cleaned_Policy_Data.dta, replace



*************************
* Impute mchiprate
*************************
use Data/Clean/Cleaned_Policy_Data.dta, clear

* Drop outlier months
sort st date
replace mchiprate = . if (date > tm(2016m9) & date < tm(2017m6) & stusps == "OH" & !missing(date))
replace mchiprate = . if !(date < tm(2016m6) | date > tm(2016m7) | stusps != "ND" | missing(date))
replace mchiprate = . if !((date != tm(2019m6) & date != tm(2019m11) & date != tm(2020m7)) | stusps != "NV" | missing(date))
replace mchiprate = . if !(date < tm(2017m6) | date > tm(2017m12) | stusps != "NM" | missing(date))
replace mchiprate = . if !(date != tm(2015m5) | stusps != "MI" | missing(date))
replace mchiprate = . if !(date != tm(2014m5) | stusps != "IA" | missing(date))
replace mchiprate = . if !(date != tm(2015m3) | stusps != "IL" | missing(date))
drop if stusps == "DC"
drop sname

ipolate mchiprate date, generate(mchiprate2) by(st)
replace mchiprate = mchiprate2
drop mchiprate2

replace mchiprate = . if !(date < tm(2017m6) | date > tm(2017m12) | stusps != "NM" | missing(date))
replace mchiprate = . if (date > tm(2016m9) & date < tm(2017m6) & stusps == "OH" & !missing(date))

save Data/Clean/Cleaned_Policy_Data.dta, replace



* gen combined stringency and frequency variable
gen str_and_freq = .
replace str_and_freq = 0 if (freq_check == 0 & str_check == 0)
replace str_and_freq = 1 if (freq_check == 1 | str_check == 1)



* labels 
rename WorkRequirements Work_Reqs
label var cutoffmax     `"Max Income Eligibility Threshold % FPL"'
label var mchiprate `"MCHIP Rate"'
label var Work_Reqs `"Work Requirements"'
label var new_redetpause `"Redetermination Pause"'
label var MedicaidExpansion `"Medicaid Expansion"'
label var GSP `"Gross State Product"'
label var ur `"Unemployment Rate"'
label var automatic_drop `"Automatic Disenrollment"'
label var freq_check `"Check Frequency"'
label var str_check `"Check Stringency"'
label var str_and_freq `"More Stringent or Frequent Checks"'
label var Adminburden `"Increase in Administrative Burden"'
label var Premlevel_201 `"Monthly Premium"'
label var cutoffmax     `"Income Eligibility Threshold"'
label var presumpteligMcaid `"1 if presumptive eligibility; children Medicaid"'
label var presumpteligCHIP `"1 if presumptive eligibility; CHIP; 2 if no separate CHIP"'
label var Adminburden `"Increase in Administrative Burden"'
label var public_charge_2017 `"Trump Administration"'
label var public_charge_2019 `"Public Charge Announcement"'
label var pov `"Poverty Rate"'
label var waittime_CHIP01 `"CHIP Wait Time"'
label var presumpteligCHIP2 `"CHIP Presumptive Eligibility"'
label var presumpteligMcaid `"Medicaid Presumptive Eligibility"'
label var realtimeelig `"Real Time Eligibility"'
label var autorenewal `"Autorenewal"'
label var imm_nowaitMdcd `"No Wait for Immigrants Medicaid"'
label var imm_nowaitCHIP2 `"No Wait for Immigrants CHIP"'


save Data/Clean/Cleaned_Policy_Data.dta, replace



*************************
* Add time since policy change variable for event studies, etc. 
*************************

* Import enrollment policies sheet to create time since policy change variable for the event study
import excel "Data/Policies.xls", sheet("PoliciesforES") firstrow clear
drop Enddate MedicaidExpansion WorkRequirements freq_check	str_check	automatic_drop	
drop if Adminburden == 0
drop if missing(Adminburden)
drop State
rename PostalCode stusps
drop Adminburden
rename Startdate daypolicydate
format daypolicydate %d


* read in the overall policy regression data
merge 1:m stusps using Data/Clean/Cleaned_Policy_Data.dta
drop _merge
* gen daydate =  dofm(date)
format daydate %d
gen timesince = ((year(daydate) - year(daypolicydate)) * 12) + (month(daydate) - month(daypolicydate))
gen yearsince = ((year(daydate) - year(daypolicydate))) 

sort st date

gen year_daypolicydate = year(daypolicydate)

* gen variable for callaway sant'anna package counting time since 2014
gen newdate = ((year(daydate) - 2014) * 12) + (month(daydate) - 1)
gen newpolicydate = ((year(daypolicydate) - 2014) * 12) + (month(daypolicydate) - 1)

gen newpolicydate_year = ((year(daypolicydate) - 2010)) 

save Data/Clean/Cleaned_Policy_Data.dta, replace




*************************
* Create dataset measuring churn
*************************
use Data/Clean/Cleaned_Policy_Data.dta, clear
* The dataset we use for churn measures a year in terms of the federal fiscal year which starts in October
* Recreate fiscalyear variable to merge correctly based on this 
drop fiscalyear
gen fiscalyear = .
replace fiscalyear = year if mo < 10
replace fiscalyear = year + 1 if mo >= 10
drop year
rename fiscalyear year
rename stname statename

merge m:1 st year using Data/Enrollment/churn_data.dta 
keep if _merge == 3
drop _merge
* generate churn variable
gen churn = .
replace churn = (mchip_annualtot - mchip_child)/mchip_annualtot if mo == 9


* collapse by year
collapse (mean) cutoffmax Premlevel_201 ur GSP pov mchip_child (sum)  Adminburden Work_Reqs new_redetpause MedicaidExpansion (max) mchip_annualtot churn, by(st year)


* policy variable takes a value of 1 if in place for more than 6 months 
foreach variable in Adminburden Work_Reqs new_redetpause MedicaidExpansion {

replace `variable' = 0 if `variable' <= 6
replace `variable' = 1 if `variable' > 6

}

gen churn_mean = (mchip_annualtot - mchip_child)/mchip_annualtot

* drop negative observations for churn 
replace churn = . if churn < 0
replace churn_mean = . if churn_mean < 0

label var cutoffmax     `"Max Income Eligibility Threshold % FPL"'
label var Work_Reqs `"Work Requirements"'
label var new_redetpause `"Redetermination Pause"'
label var MedicaidExpansion `"Medicaid Expansion"'
label var GSP `"Gross State Product"'
label var ur `"Unemployment Rate"'
label var Adminburden `"Increase in Administrative Burden"'
label var Premlevel_201 `"Monthly Premium"'
label var cutoffmax     `"Income Eligibility Threshold"'
label var pov `"Poverty Rate"'



save Data/Clean/churn_policy.dta, replace


